## Executed March 2, 2024, 
## using following software:
## (1) 
### R version 4.3.1 (2023-06-16) -- "Beagle Scouts"
### Copyright (C) 2023 The R Foundation for Statistical Computing
### Platform: x86_64-pc-linux-gnu (64-bit)
## (2) 
### RStudio 2023.06.0+421 "Mountain Hydrangea" Release
### (583b465ecc45e60ee9de085148cd2f9741cc5214, 2023-06-06) 
### for Ubuntu Focal Mozilla/5.0 (X11; Linux x86_64) 
### AppleWebKit/537.36 (KHTML, like Gecko) rstudio/2023.06.0+421 
### Chrome/110.0.5481.208 Electron/23.3.0 Safari/537.36
## (3) Ubuntu 22.04.4 LTS (64 bit)

## Clean Up Space
rm(list=ls())

## Set working directory to the location of this file (through RStudio)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## Required Packages
library(stringr)
library(quanteda)
library(quanteda.textstats)
library(seededlda)

## Function extracting difference-in-difference estimates
exteff <- function(m, vcov, tr, mod, modval) {
  mc <- coeftest(m, vcov. = vcov)
  ## Exporting marginal effects and SEs
  out <- data.frame(modval = modval, est = NA, se = NA)
  if (all(!modval%in%"subtract")) {
    for(i in 1:length(modval)) {
      if (length(which(rownames(mc)==paste0(mod,modval[i])))==0) {
        out$est[i] <- mc[which(rownames(mc)==tr),1]
        out$se[i] <- mc[which(rownames(mc)==tr),2]
      } else {
        out$est[i] <- mc[which(rownames(mc)==tr),1] + 
          mc[which(rownames(mc)==paste0(tr,":",mod,modval[i])),1]
        out$se[i] <- sqrt(
          mc[which(rownames(mc)==tr),2]^2 + 
            1^2 * 
            mc[which(rownames(mc)==paste0(tr,":",mod,modval[i])),2]^2 + 
            2 * 1 * 
            vcov[which(rownames(mc)==tr),
                 which(rownames(mc)==paste0(tr,":",mod,modval[i]))]
        )
      }
    }
  ## Exporting difference in coefficients and SEs  
  } else if (length(modval)==1 & all(modval=="subtract")) {
    out <- data.frame(modval = tr, est = NA, se = NA)
    for(i in 1:length(tr)) {
      ## Reference Category vs mod
      if (tr[i]=="REFERENCE") {
        out$est[i] <- (-1)*mc[which(rownames(mc)==mod),1]
        out$se[i] <- mc[which(rownames(mc)==mod),2]
        ## Just create zero  
      } else if (tr[i]=="SAME") {
        out$est[i] <- 0
        out$se[i] <- NA
        ## All else
      } else {
        out$est[i] <- mc[which(rownames(mc)==tr[i]),1] + 
          (-1)*mc[which(rownames(mc)==mod),1]
        out$se[i] <- sqrt(
          mc[which(rownames(mc)==tr[i]),2]^2 + 
            (-1)^2 * mc[which(rownames(mc)==mod),2]^2 + 
            2 * (-1) * vcov[which(rownames(mc)==tr[i]),
                            which(rownames(mc)==mod)]
        )
      }
    }
  }
  out$lci90 <- out$est - out$se*qnorm(0.95)
  out$uci90 <- out$est + out$se*qnorm(0.95)
  out$lci95 <- out$est - out$se*qnorm(0.975)
  out$uci95 <- out$est + out$se*qnorm(0.975)
  return(out)
}

## Read in Data (Drop years prior to 1992)
load("ungd_files.RData")
d <- subset(ungd_files, Year >=1992)
rm(ungd_files)

## Character vectors that identify state groups
CAstates <- c("KAZ","UZB","TKM","KGZ","TJK")
NonCAstates <- c("MDA","ARM","AZE",
                 "EST","LVA","LTU")
Balticstates <- c("EST","LVA","LTU")
Caucasusstates <- c("MDA","ARM","AZE")
Belarus <- c("BLR")
Georgia <- c("GEO")
Ukraine <- c("UKR")
Russia <- c("RUS")
### Setting colors attached to each group
clrscale <- c("#d95f02","#1b9e77","#666666",
              "#7570b3","#66a61e",
              "#e7298a","#a6761d","#e6ab02")

## Create Character Vector of Speech
tvec_tgt <- d$text %>% str_squish
## typo fix
tvec_tgt <- gsub("soverign","sovereign",tvec_tgt)

## Identify person, location, and organization entities 
### (RAN BEFOREHAND, SINCE IT TAKES TIME)
## if (!require("pacman")) install.packages("pacman")
## pacman::p_load_gh("trinker/entity")
# library(entity)
# psn_ent <- sort(unique(unlist(person_entity(tvec_tgt))))
# loc_ent <- sort(unique(unlist(location_entity(tvec_tgt))))
# org_ent <- sort(unique(unlist(organization_entity(tvec_tgt))))
# save(psn_ent,loc_ent,org_ent, file="ungd_entities.RData")
### (END)
load("ungd_entities.RData")

## Generate Document Level Corpus
cs_tgt_doc <- corpus(tvec_tgt, docnames=d$doc_id) 
head(cs_tgt_doc)
## Generate Sentence Level Corpus
cs_tgt <- cs_tgt_doc %>% corpus_reshape("sentences")
head(cs_tgt)

## Tokenize Text (at a sentence level)
### * Remove punctuations, symbols, numbers, URLs
### * Remove entities
### * Remove stop words
tk_tgt_0 <- tokens(cs_tgt,  
                 remove_punct = TRUE, remove_symbols = TRUE, 
                 remove_numbers = TRUE, remove_url = TRUE)  %>% 
  tokens_compound(c(psn_ent,loc_ent,org_ent)[grep(" ",c(psn_ent,loc_ent,org_ent))]) %>%
  tokens_remove(gsub(" ","_",c(psn_ent,loc_ent,org_ent))) %>% 
  tokens_remove(stopwords("en")) 

## Collocations List
### (RAN BEFOREHAND, SINCE IT TAKES TIME)
# mwlist <- textstat_collocations(cs_tgt)
# saveRDS(mwlist, "unga_mwlist.rds")
### (END)
mwlist <- readRDS("unga_mwlist.rds")

## Limit to those associated statistically significantly (p<0.001)
## and remove the ones that contain stopwords
mwlist <- mwlist[which(mwlist[,"z"]>qnorm(0.9995)),]
mwlist <- mwlist[-grep(paste0(paste(paste0(" ",stopwords("en")), collapse = "$|"),"$"),mwlist$collocation),]
mwlist <- mwlist[-grep(paste0("^",paste(paste0(stopwords("en")," "), collapse = "|^")),mwlist$collocation),]

## Check words that are relevant to sovereignty frame
mwlist[grep("sovereignty",mwlist$collocation),]
mwlist[grep("sovereign ",mwlist$collocation),]
mwlist[grep("territorial integrity",mwlist$collocation),]
mwlist[grep("autonomous",mwlist$collocation),]
mwlist[grep("autonomy",mwlist$collocation),]
mwlist[grep("independent",mwlist$collocation),]
mwlist[grep("independence",mwlist$collocation),]
mwlist[grep("self-determination",mwlist$collocation),]
mwlist[grep("self-government",mwlist$collocation),]
mwlist[grep("self-governance",mwlist$collocation),]
mwlist[grep("interfere",mwlist$collocation),]
mwlist[grep("non-intervention",mwlist$collocation),]

## Further limit collocations to those with z score >= 100
mwbasic <- mwlist[which(mwlist[,"z"]>=100),]
## Make sure to add collocations that is related to sovereignty frame
mwextra <- c("right to exist",
             "territorial integrity")

## Sentence Level Data, tokenized, collocations compounded
tk_tgt <- tk_tgt_0 %>% 
  tokens_compound(mwbasic) %>% 
  tokens_compound(pattern=phrase(mwextra))
rm(tk_tgt_0)
dfm_tgt <- tk_tgt %>% dfm() %>% 
  dfm_trim(min_termfreq = 10)

## Dictionary of Seed Words for "Sovereignty" Frame
seedwords <- c("sovereign","sovereignty","sovereignly",
               "territorial_integrity",
               "autonomy","autonomous", 
               "independence","independent","independent_state",
               "self-determination",
               "self-governance","self-government", 
               "interfere", "interference", "right_to_exist",
               "non-intervention")
dicseed <- dictionary(list(sovereignty=seedwords))

## Divergence Scores K=3 through K=60
### (RAN BEFOREHAND, SINCE IT TAKES TIME)
# divscores <- data.frame(k=c(3:60),ad=NA,hi=NA,me=NA,lo=NA)
# for(k in 2:59) {
#   set.seed(k+34567)
#   tmp <- textmodel_seededlda(dfm_tgt, dicseed, residual=k, gamma = 0.5,
#                        batch_size=0.01, auto_iter=TRUE, verbose=TRUE)
#   divscores$ad[k-1] <- divergence(tmp, regularize = FALSE)
#   divscores$hi[k-1] <- divergence(tmp, min_size=0.01)
#   divscores$me[k-1] <- divergence(tmp, min_size=0.05)
#   divscores$lo[k-1] <- divergence(tmp, min_size=0.1)
#   print(paste("K =",k+1,":",divscores[k-1,]))
#   saveRDS(divscores, "divscores.rds")
# }
### (END)
divscores <- readRDS("divscores.rds")
head(divscores)

## Max score locations
divscores$k[which.max(divscores$ad)]
divscores$k[which.max(divscores$hi)]
divscores$k[which.max(divscores$me)]
divscores$k[which.max(divscores$lo)]
divscores$admax <- ifelse(divscores$ad==max(divscores$ad),divscores$ad,NA)
divscores$himax <- ifelse(divscores$hi==max(divscores$hi),divscores$hi,NA)
divscores$memax <- ifelse(divscores$me==max(divscores$me),divscores$me,NA)
divscores$lomax <- ifelse(divscores$lo==max(divscores$lo),divscores$lo,NA)

## Visualizing Optimal Number of Topics
## (THIS IS FIGURE A1 in ONLINE APPENDIX B)
require(ggplot2)
ggplot(divscores, aes(x=k)) + 
  geom_point(aes(y=ad, shape="1", color="1")) + 
  geom_point(aes(y=hi, shape="2", color="2")) + 
  geom_point(aes(y=me, shape="3", color="3")) + 
  geom_point(aes(y=lo, shape="4", color="4")) + 
  geom_point(aes(y=admax), shape=21, color="black", fill="black") + 
  geom_point(aes(y=himax), shape=22, color="#1b9e77", fill="#1b9e77") + 
  geom_point(aes(y=memax), shape=23, color="#d95f02", fill="#d95f02") + 
  geom_point(aes(y=lomax), shape=24, color="#7570b3", fill="#7570b3") + 
  coord_cartesian(ylim=c(0,0.6)) + 
  scale_shape_manual(name="", labels=c("AD",
                                       bquote("RD ("~ delta == 0.01~")"),
                                       bquote("RD ("~ delta == 0.05~")"),
                                       bquote("RD ("~ delta == 0.1~")")),
                     values = c(1,0,5,2)) +
  scale_color_manual(name="", labels=c("AD",
                                       bquote("RD ("~ delta == 0.01~")"),
                                       bquote("RD ("~ delta == 0.05~")"),
                                       bquote("RD ("~ delta == 0.1~")")),
                     values = c("black","#1b9e77","#d95f02","#7570b3")) +
  labs(y="Divergence", x="Number of topics (K)") + 
  theme_bw() + 
  theme(legend.position = c(0.9,0.2))
ggsave("slda_optk.pdf", width=6, height=4)

## High topic granularity optimal
### (RAN BEFOREHAND, SINCE IT TAKES TIME)
# divscores$k[which.max(divscores$hi)] # 31 topics
# set.seed(30+34567)
# slda_hi_opt <- textmodel_seededlda(dfm_tgt, dicseed, residual=30, gamma = 0.5,
#                        batch_size=0.01, auto_iter=FALSE, verbose=TRUE)
# saveRDS(slda_hi_opt, "slda_hi_opt.rds")
### (END)
slda_hi_opt <- readRDS("slda_hi_opt.rds")

## Document Feature Matrix Creation
dfm_tgt_keys <- tk_tgt %>% 
  tokens_select(seedwords) %>% 
  dfm() %>%
  convert(to="data.frame")
dfm_tgt_keys$key_all <- (rowSums(dfm_tgt_keys[,-1])>0)*1
dfm_tgt_keys$sentence_id <- dfm_tgt_keys$doc_id
dfm_tgt_keys$doc_id <- gsub("\\..*$", "", dfm_tgt_keys$sentence_id)

## Add Seeded LDA Probabilities 
all(names(slda_hi_opt$theta[,1])==dfm_tgt_keys$sentence_id)
dfm_tgt_keys$slda_hi <- as.numeric(slda_hi_opt$theta[,1])*100
## cor(dfm_tgt_keys$key_all, dfm_tgt_keys$slda_hi)

## Remove Seeded LDA results
rm(slda_hi_opt)

## Generate Grand Dataset
dsov <- merge(d, dfm_tgt_keys)

## Weighting Variable (Inverse of Speech Length)
dsov$w <- NA
cy <- paste(dsov$Country,dsov$Year)
for(c in unique(cy)) {
  tmp <- length(which(cy==c))
  dsov$w[which(cy==c)] <- (1/tmp)*100
  cat(paste0(c," "))
}
rm(cy,tmp)
## hist(dsov$w)

##########################
## Descriptive Analysis ##
##########################

## Trend for CA states
dsovca <- subset(dsov, Country %in% CAstates) 
dsovca_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovca[,c("slda_hi","w")], dsovca$Year,
           function(d) weighted.mean(d$slda_hi,d$w))),
    Year = unique(dsovca$Year))
dsovca_annual$Region <- "Central Asia (CA)"

## Trend for non-CA post-Soviet states
dsovps <- subset(dsov, Country %in% c(NonCAstates)) #
dsovps_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovps[,c("slda_hi","w")], dsovps$Year,
           function(d) weighted.mean(d$slda_hi,d$w))),
    Year = unique(dsovps$Year))
dsovps_annual$Region <- "Non-CA post-Soviet\n(excl. Russia, Belarus, Ukraine, Georgia)"

## Trend for non-post-Soviet states
dsovelse <- subset(dsov, !Country %in% c(CAstates,NonCAstates,Ukraine,Georgia,Belarus,Russia)) 
dsovelse_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovelse[,c("slda_hi","w")], dsovelse$Year,
           function(d) weighted.mean(d$slda_hi,d$w))),
    Year = unique(dsovelse$Year))
dsovelse_annual$Region <- "Non-Post-Soviet"
dsovelse_annualy <- dsovelse_annual
dsovelse_annualy$Region <- "Non-Post-Soviet"

## Binding all data into one dataset
dsov_annual_cb <- rbind(dsovca_annual, dsovps_annual, 
                        dsovelse_annual)
dsov_annual_cb$Region <- factor(dsov_annual_cb$Region,
                                   levels=unique(dsov_annual_cb$Region))

## Plot trends in sovereignty frame appearances
## (THIS IS FIGURE A2 in ONLINE APPENDIX B)
require(ggplot2)
ggplot(dsov_annual_cb, aes(x=Year,y=x)) + 
  geom_vline(aes(xintercept=2008), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_vline(aes(xintercept=2014), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_vline(aes(xintercept=2022), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_point(aes(color=Region,shape=Region), size=2.5) + 
  geom_line(aes(color=Region,linetype=Region)) + 
  scale_x_continuous(breaks=c(1992,2000,2008,2010,2014,2020,2022)) +
  scale_shape_manual(name="Region", values=c(16,17,15)) + 
  scale_linetype_manual(name="Region", values=c(1,2,3)) + 
  scale_color_manual(name="Region", values=clrscale[1:3]) + 
  labs(y = "Average proportion of sovereignty frame topic\nin a given speech sentene",
       x = "Year of UNGA (General Debate is in September)",
       caption = 
         paste0("Note 1: Central Asian states include Kazakhstan, Kyrgyzstan, Tajikistan, Turkmenistan, and Uzbeistan.\n",
                "Note 2: Non-CA post-Soviet states include Moldova, Armenia, Azerbaijan, Estonia, Latvia, and Lithuania.\n",
                "Note 3: Proportion is inversely weighted by speech length so that each speech is treated equally.")) + 
  theme_classic() + 
  theme(legend.position = c(0.3,0.8),
        legend.background = element_rect(color="black"),
        plot.title = element_text(hjust=0.5),
        plot.caption = element_text(hjust=0, size=10),
        plot.caption.position = "plot")

ggsave("trend_analysis_main_slda_hi_v3.pdf", 
       width = 9, height = 6)

########################################
## Difference in difference estimator ##
########################################

## Country Group Variable (excl. Ukraine, Georgia, Belarus, Russia)
dsov$cgr <- NA
dsov$cgr[which(dsov$Country%in%CAstates)] <- "Central Asia (CA)"
dsov$cgr[which(dsov$Country%in%NonCAstates)] <- "Non-CA post-Soviet"
dsov$cgr[which(!dsov$Country%in%c(CAstates,NonCAstates,Ukraine,Georgia,Belarus,Russia))] <- 
  "Non-post-Soviet"
dsov$cgrx <- factor(dsov$cgr, levels = c("Non-post-Soviet", 
                                        "Central Asia (CA)", 
                                        "Non-CA post-Soviet"))
## Russian adventurism year variable
dsov$tryear <- ifelse(dsov$Year %in% c(2008, 2014, 2022), 1, 0)


##############################
## Estimations (Main Model) ##
##############################

require(sandwich)
require(lmtest)

## Main Model (1997-2022)
ma <- lm(slda_hi ~ tryear + cgrx + tryear*cgrx,  
          data=subset(dsov, !Year %in% c(1992:1996)),
          weights = w)
mac_vcov <- vcovCL(ma, subset(dsov, !Year %in% c(1992:1996))$Country)
mac <- coeftest(ma, vcov.=mac_vcov)
## Main Model (2006-7 vs 2008)
m08 <- lm(slda_hi ~ tryear + cgrx + tryear*cgrx,  
           data=subset(dsov, Year %in% c(2006,
                                         2007,2008)), 
           weights = w)
m08c_vcov <- vcovCL(m08, subset(dsov, Year %in% c(2006,
                                                  2007,2008))$Country)
m08c <- coeftest(m08, vcov.=m08c_vcov)
## Main Model (2012-13 vs 2014)
m14 <- lm(slda_hi ~ tryear + cgrx + tryear*cgrx,  
           data=subset(dsov, Year %in% c(2012,
                                         2013,2014)),
           weights = w)
m14c_vcov <- vcovCL(m14, subset(dsov, Year %in% c(2012,
  2013,2014))$Country)
m14c <- coeftest(m14, vcov.=m14c_vcov)
## Main Model (2020-21 vs 2022)
m22 <- lm(slda_hi ~ tryear + cgrx + tryear*cgrx,  
           data=subset(dsov, Year %in% c(2020,
                                         2021,2022)),
           weights = w)
m22c_vcov <- vcovCL(m22, subset(dsov, Year %in% c(2020,
                                                  2021,2022))$Country)
m22c <- coeftest(m22, vcov.=m22c_vcov)

## Presenting results in a table
## (THIS IS TABLE A2 in ONLINE APPENDIX B)
require(texreg)
screenreg(list(ma,m08,m14,m22), digits = 3,
          override.se = list(mac[,2],m08c[,2],
                             m14c[,2],m22c[,2]),
          override.pvalues = list(mac[,4],m08c[,4],
                                  m14c[,4],m22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Non-CA post-Soviet",
                                "Adventurism * CA",
                                "Adventurism * Non-CA"),
          custom.model.names = c("All (97-22)","2006-08","2012-14","2020-22"),
          stars = c(0.1,0.05,0.01,0.001), symbol = "+")
texreg(list(ma,m08,m14,m22), digits = 3,
          override.se = list(mac[,2],m08c[,2],
                             m14c[,2],m22c[,2]),
          override.pvalues = list(mac[,4],m08c[,4],
                                  m14c[,4],m22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Non-CA post-Soviet",
                                "Adventurism * CA",
                                "Adventurism * Non-CA"),
       custom.model.names = c("All (97-22)","2006-08","2012-14","2020-22"),
       stars = c(0.1,0.05,0.01,0.001), symbol = "+",
       custom.note = "%stars. Robust standard errors clustered by countries in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table = FALSE,
       file = "did_table_main_slda_hi_v3.tex")

## Extracting Marginal Effects
## for 1997-2022 model
maceff <- exteff(ma, mac_vcov, 
       "tryear", "cgrx", 
       levels(dsov$cgrx))
maceff$m <- "All years\n(1997~2022)"
## for 2006-7 vs 2008 model
m08eff <- exteff(m08, m08c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m08eff$m <- "Russo-Georgian War\n(2006-07 vs 2008)"
## for 2012-13 vs 2014 model
m14eff <- exteff(m14, m14c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m14eff$m <- "Crimea Annexation\n(2012-13 vs 2014)"
## for 2020-21 vs 2022 model
m22eff <- exteff(m22, m22c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m22eff$m <- "Russo-Ukrainian War\n(2020-21 vs 2022)"

## Binding all results 
malleff <- rbind(maceff,m08eff,m14eff,m22eff)
malleff$modval <- factor(malleff$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malleff$m <- factor(malleff$m, levels = unique(malleff$m))
malleff$type <- "Average effect of\nRussian adventurism"

## Difference from CA states
## for 1997-2022 model
macdif <- exteff(ma, mac_vcov, 
       c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
       "tryear:cgrxCentral Asia (CA)",
       "subtract")
macdif$modval <- maceff$modval
macdif$m <- "All years\n(1997~2022)"
## for 2006-7 vs 2008 model
m08dif <- exteff(m08, m08c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m08dif$modval <- m08eff$modval
m08dif$m <- "Russo-Georgian War\n(2006-07 vs 2008)"
## for 2012-13 vs 2014 model
m14dif <- exteff(m14, m14c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m14dif$modval <- m14eff$modval
m14dif$m <- "Crimea Annexation\n(2012-13 vs 2014)"
## for 2020-21 vs 2022 model
m22dif <- exteff(m22, m22c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m22dif$modval <- m22eff$modval
m22dif$m <- "Russo-Ukrainian War\n(2020-21 vs 2022)"

## Binding all results 
malldif <- rbind(macdif,m08dif,m14dif,m22dif)
malldif$modval <- factor(malldif$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malldif$m <- factor(malldif$m, levels = unique(malldif$m))
malldif$type <- "Difference\nfrom CA states"

## Display Confidence Intervals
cat("Confidence intervals for difference from CA states")
cbind(malldif[,1],round(malldif[,c(2,4,5,6,7)],3))

# ## Binding Effects & Differences
# malleffdif <- rbind(malleff,malldif)
# malleffdif$type <- as.factor(malleffdif$type)

## Plot difference-in-difference estimates
## (THIS IS FIGURE A3 in ONLINE APPENDIX B)
require(ggplot2)
ggplot(malleff, aes(x=est, y=modval, 
                    shape=factor(modval,levels=rev(levels(modval))),
                    color=factor(modval,levels=rev(levels(modval))))) + 
  geom_vline(aes(xintercept=0), linetype = 2) +
  geom_errorbarh(aes(xmin=lci95,xmax=uci95), 
                linewidth=0.5, height=0, alpha=1)+ 
  geom_errorbarh(aes(xmin=lci90,xmax=uci90),
                linewidth=1.5, height=0, alpha=0.8)+
  geom_point(size = 2.5) + 
  scale_shape_manual(name="Region", values=c(16,17,15)) + 
  scale_color_manual(name="Region", values=clrscale[c(1,2,3)]) + 
  facet_grid(m~type) + 
  labs(y = NULL, 
       x = paste0("Average effect of Russian adventurism (2008, 2014, 2022) on\n",
                  "the proportion of ",
                  "sovereignty frame topic in a given sentence"),
       caption = paste0("Note 1: Estimated with OLS regression, inversely weighted by speech length so that each speech is treated equally.\n",
                        "Note 2: Thin and thick bars respectively represent 95% and 90% confidence intervals using robust standard errors clustered by countries.\n",
                        "Note 3: Non-CA post-Soviet states exclude Russia, Belarus, Ukraine, and Georgia.")) +
  theme_bw() + 
  theme(strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0),
        plot.caption.position = "plot",
        legend.position = "none")

ggsave("did_analysis_main_slda_hi_v3.2.pdf", 
       width = 8, height = 5)
